In [1]:
from __future__ import print_function
In [60]:
from IPython.html.services.config import ConfigManager
from IPython.utils.path import locate_profile
cm = ConfigManager(profile_dir=locate_profile(get_ipython().profile))
cm.update('livereveal', {
'width': 1024,
'height': 768,
})
Out[60]:
In [2]:
# Enable plots in the notebook
%matplotlib inline
import matplotlib.pyplot as plt
# Seaborn makes our plots prettier
import seaborn
seaborn.set(style='ticks')
# Import the audio playback widget
from IPython.display import Audio
# These are generally useful to have around
import numpy as np
import scipy
import mir_eval
In [3]:
import librosa
In [4]:
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (12, 4)
mpl.rcParams['figure.autolayout'] = True
load()
returns the audio and sampling rate.
By default, all audio is resampled to 22KHz mono.
In [5]:
y, sr = librosa.load(librosa.util.example_audio_file())
In [6]:
print(type(y), type(sr))
In [7]:
print(y.shape, sr)
In [8]:
window = np.pad(scipy.signal.hann(2048, sym=False), [1024, 1024], mode='constant')
plt.subplot(3,1,1)
plt.plot(y[1024 + 4096:1024 + 8192], alpha=0.75, label='y')
plt.plot(window * y[1024 + 4096:1024 + 8192].max() / (1.05 * window.max()), alpha=0.9, color='r', label='window')
plt.xticks([])
plt.yticks([])
plt.legend(frameon=True)
plt.axis('tight')
plt.subplot(3,1,2)
plt.plot(y[1024 + 4096:1024 + 8192] * window, alpha=0.75, label='y * window')
plt.vlines([1024, 2048+1024],
y[1024 + 4096:1024 + 8192].min(),
y[1024 + 4096:1024 + 8192].max())
plt.annotate('', xy=(1024, -0.05), xytext=(1024+2048, -0.05),
arrowprops=dict(arrowstyle="<->"))
plt.text(1024+384, -0.065, 'n_fft')
plt.xticks([])
plt.yticks([])
plt.ylabel('Frame $n$')
plt.legend(frameon=True)
plt.axis('tight')
plt.subplot(3,1,3)
plt.plot(y[1024 + 4096:1024 + 8192] * np.roll(window, 512), alpha=0.75)
plt.vlines([1024, 2048+1024],
y[1024 + 4096:1024 + 8192].min(),
y[1024 + 4096:1024 + 8192].max(),
alpha=0.25)
plt.ylabel('Frame $n+1$')
plt.annotate('', xy=(1024, 0.05), xytext=(1024+512, 0.05),
arrowprops=dict(arrowstyle="<->"))
plt.text(1024+96, 0.065, 'hop_length')
plt.vlines([512 + 1024, 512 + 2048+1024],
y[1024 + 4096:512 + 1024 + 8192].min(),
y[1024 + 4096:512 + 1024 + 8192].max())
plt.legend(frameon=True)
plt.yticks([])
plt.xlabel('Samples')
plt.axis('tight')
plt.savefig('frames.png')
We can play back audio with the IPython audio widget
In [9]:
Audio(data=y, rate=sr)
Out[9]:
The display module can plot waveforms.
In [82]:
librosa.display.waveplot(y, sr);
In [11]:
D = librosa.stft(y)
STFT spectra are complex-valued:
$D = S e^{j\phi}$
In [12]:
print(D.shape, D.dtype)
Magnitude and phase can be separated and recombined:
In [13]:
S, phase = librosa.magphase(D)
print(np.allclose(D, S * phase))
The display module can show spectrograms too.
In [81]:
log_power = librosa.logamplitude(D**2, ref_power=np.max)
librosa.display.specshow(log_power, x_axis='time', y_axis='linear')
plt.colorbar();
All the action is in the bottom of the image!
We can fix this with a log-frequency axis.
In [80]:
librosa.display.specshow(log_power, x_axis='time',
y_axis='log')
plt.colorbar();
What if I want a direct log-frequency analysis?
Instead of STFT, we can use a Constant-Q transform (CQT).
In [16]:
C = librosa.cqt(y, sr)
In [79]:
librosa.display.specshow(librosa.logamplitude(C**2),
x_axis='time', y_axis='cqt_hz')
plt.colorbar();
By default, logamplitude
cuts at the top 80 dB.
Reducing this gives a sparser image.
In [78]:
librosa.display.specshow(librosa.logamplitude(C**2,
top_db=40),
x_axis='time', y_axis='cqt_note')
plt.colorbar();
In [19]:
chroma = librosa.feature.chroma_cqt(C=C, sr=sr)
In [77]:
librosa.display.specshow(chroma, x_axis='time', y_axis='chroma')
plt.colorbar();
Other spectral features include Mel spectra, MFCC, and Tonnetz.
In [21]:
M = librosa.feature.melspectrogram(y=y, sr=sr)
MFCC = librosa.feature.mfcc(y=y, sr=sr)
tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
... and lots more in the feature
module
In [22]:
plt.subplot(3,1,1)
librosa.display.specshow(librosa.logamplitude(M), y_axis='mel')
plt.ylabel('Mel spectra')
plt.subplot(3,1,2)
librosa.display.specshow(MFCC)
plt.ylabel('MFCC')
plt.subplot(3,1,3)
librosa.display.specshow(tonnetz, x_axis='time', y_axis='tonnetz')
plt.ylabel('Tonnetz')
Out[22]:
In [23]:
y_harmonic, y_percussive = librosa.effects.hpss(y)
In [24]:
Audio(data=y, rate=sr)
Out[24]:
In [25]:
Audio(data=y_harmonic, rate=sr)
Out[25]:
In [26]:
Audio(data=y_percussive, rate=sr)
Out[26]:
Now compare full vs harmonic CQT:
In [54]:
C_harmonic = librosa.cqt(y_harmonic, sr)
C_perc = librosa.cqt(y_percussive, sr)
In [71]:
plt.figure(figsize=(12,6))
plt.subplot(3,1,1), librosa.display.specshow(C**(1./3), y_axis='cqt_hz'), plt.ylabel('Full')
plt.subplot(3,1,2), librosa.display.specshow(C_harmonic**(1./3), y_axis='cqt_hz'), plt.ylabel('Harmonic')
plt.subplot(3,1,3), librosa.display.specshow(C_perc**(1./3), y_axis='cqt_hz', x_axis='time'), plt.ylabel('Percussive');
In [47]:
onset_envelope = librosa.onset.onset_strength(y, sr)
Onset events can be detected:
In [30]:
onsets = librosa.onset.onset_detect(onset_envelope=onset_envelope)
In [83]:
plt.subplot(2, 1, 1)
plt.plot(onset_envelope, label='Onset strength')
plt.vlines(onsets, 0, onset_envelope.max(), color='r', alpha=0.25, label='Onsets')
plt.xticks([]), plt.yticks([])
plt.legend(frameon=True)
plt.axis('tight')
plt.subplot(2, 1, 2)
librosa.display.waveplot(y, sr);
Onset strength is used to track beats and estimate tempo:
In [32]:
tempo, beats = librosa.beat.beat_track(onset_envelope=onset_envelope)
In [33]:
print(tempo)
In [84]:
plt.plot(onset_envelope, label='Onset strength')
plt.vlines(beats, 0, onset_envelope.max(), color='r', alpha=0.25, label='Beats')
plt.xticks([]), plt.yticks([])
plt.legend(frameon=True)
plt.axis('tight');
Beat events are in frame indices.
We can convert to time (in seconds), and sonify with mir_eval
.
In [35]:
beat_times = librosa.frames_to_time(beats)
In [36]:
y_click = mir_eval.sonify.clicks(beat_times, sr, length=len(y))
In [37]:
Audio(data=y + y_click, rate=sr)
Out[37]:
Beat-synchronous feature aggregation:
In [48]:
c_sync = librosa.feature.sync(chroma, beats, aggregate=np.median)
In [74]:
librosa.display.specshow(c_sync, y_axis='chroma')
plt.colorbar();
History embedding can add context:
In [50]:
chroma_stack = librosa.feature.stack_memory(c_sync, n_steps=3,
mode='edge')
In [75]:
librosa.display.specshow(chroma_stack, y_axis='chroma');
Recurrence plots show nearest neighbor linkage for each frame.
Chroma recurrence can encode harmonic repetitions.
In [52]:
R = librosa.segment.recurrence_matrix(chroma_stack, sym=True)
In [76]:
# Diagonal lines indicate repeated progressions
librosa.display.specshow(R, aspect='equal');
Post-processing $R$ can reveal structural components, metrical structure, etc...